************************************************************
************************************************************
**														  **
**	Tanzania, 2010-11									  **
**										   				  **
**										   				  **
************************************************************
************************************************************

clear
version 14
set more off
cap log close

gl IN 	"${DATADRIVE}\LSMS-ISA\2010-11"
gl OUT 	"${LOCDRIVE}\Results"
gl TEMP	"${LOCDRIVE}\Tempdata"

loc hh y2_hhid
loc ea ea
loc weight y2_weight
loc strata strataid
loc admin0 macroregion
loc admin1 region 		
loc admin2 district  	
loc admin3 ward
loc indiv indidy2

set matsize 10000

loc admin0_base = $admin0_base
loc admin1_base = $admin1_base
loc admin2_base = $admin2_base
loc admin3_base = $admin3_base

loc gooddisagglist $gooddisagglist 
loc servicedisagglist $servicedisagglist

log using "${TEMP}\DataPrep_Tanzania_R2", replace


*****************************
**** Create Sample Files ****
*****************************

use  "${IN}\HH_SEC_A.dta", clear
sort `hh'

rename y2_rural urban
recode urban (0=1) (1=0) /*was coded as 0 urban 1 rural*/
label define yesno 1 yes 0 no
label values urban yesno

keep `hh' `ea' `weight' `strata' `admin1' `admin2' `admin3' urban
sort `hh'

gen macroregion=1 if region==1 | region==13		 /*central*/
replace macroregion=2 if region==2 | region==3 | region==4 | region==21	/* northern*/
replace macroregion=3 if region==5 | region==6 | region==7		/*eastern*/
replace macroregion=4 if region==8 | region==9 | region==10	 /*southern*/
replace macroregion=5 if region==11 | region==12 | region==15	/*southern highlands*/
replace macroregion=6 if region==14 | region==16 | region==17	/*western*/
replace macroregion=7 if region==18 | region==19 | region==20	/*lake*/
replace macroregion=8 if region>=51 & region<=55	/*zanzibar*/

tab macroregion,m
label define macroregion 1 "Central" 2 "Northern" 3 "Eastern" 4 "Southern" 5 "Southern Highlands" 6 "Western" 7 "Lakes" 8 "Zanzibar"
label values macroregion macroregion 


tempfile sample
save `sample', replace

g count = 1
collapse (sum) count, by(`ea' `admin0' `admin1' `admin2' `admin3' urban)
drop count
sort `admin0' `admin1' `admin2' `admin3' `ea'
egen ea_c = group(`admin0' `admin1' `admin2' `admin3' `ea')
tempfile sample_ea
save `sample_ea', replace

g count = 1
collapse (sum) count (median) urban_adm3=urban, by(`admin0' `admin1' `admin2' `admin3')
drop count
recode urban_adm3 (0.5=1)
sort `admin0' `admin1' `admin2' `admin3'
egen admin3_c = group(`admin0' `admin1' `admin2' `admin3')
su admin3_c if `admin0' == `admin0_base' & `admin1' == `admin1_base' & `admin2' == `admin2_base' & `admin3'==`admin3_base', detail
loc admin3_cbase = r(p50)
display "`admin3_cbase'"
tempfile sample_admin3
save `sample_admin3', replace

g count = 1
collapse (sum) count, by(`admin0' `admin1' `admin2')
drop count
sort `admin0' `admin1' `admin2' 
egen admin2_c = group(`admin0' `admin1' `admin2')
su admin2_c if `admin0' == `admin0_base' & `admin1' == `admin1_base' & `admin2' == `admin2_base', detail
loc admin2_cbase = r(p50)
display "`admin2_cbase'"
tempfile sample_admin2
save `sample_admin2', replace

g count = 1
collapse (sum) count, by(`admin0' `admin1')
drop count
sort `admin0' `admin1' 
egen admin1_c = group(`admin0' `admin1')
su admin1_c if `admin0' == `admin0_base' & `admin1' == `admin1_base', detail
loc admin1_cbase = r(p50)
display "`admin1_cbase'"
tempfile sample_admin1
save `sample_admin1', replace

g count = 1
collapse (sum) count, by(`admin0')
drop count
sort `admin0' 
egen admin0_c = group(`admin0')
su admin0_c if `admin0' == `admin0_base', detail
loc admin0_cbase = r(p50)
display "`admin0_cbase'"
tempfile sample_admin0
save `sample_admin0', replace
*


** Create a community level merge -- this is messy

use `sample', clear
merge m:1 `admin0' `admin1' `admin2' `admin3' `ea' using `sample_ea', nogen keepusing(ea_c)
merge m:1 `admin0' `admin1' `admin2' `admin3' using `sample_admin3', nogen keepusing(admin3_c)
merge 1:1 `hh' using "${IN}\y2commlink.dta", nogen keep(1 3)
egen id_03_c = group(id_03 id_02 id_01)
drop if id_01==.
tempfile commlink_hh
save `commlink_hh', replace

collapse (median) admin3_c `admin3' `admin2' `admin1' `admin0' (min) min_adm3=admin3_c (max) max_adm3=admin3_c (sd) sd_adm3=admin3_c, by(id_03_c id_03 id_02 id_01) 

levelsof id_03_c if min_adm3 != max_adm3  , loc(checklist)
*& !( id03max== id_03_c | id03min== id_03_c)

/* weird ones
browse if min_adm3 != max_adm3
id_01	id_02	id_03	id_03_c	admin3_c	min_adm3	max_adm3	sd_adm3
Dar es Salaam	1	12	6	181	181	243	16.54365
Dar es Salaam	2	12	8	209	209	221	4.24264
Iringa	6	12	13	367	350	375	7.030546
Rukwa	3	13	18	426	426	427	.3333333
Arusha	4	13	19	63	63	65	1.032796
Kigoma	2	21	27	474	474	479	1.767767
Kusini Unguja	2	22	35	658	657	659	.4714045
Mwanza	3	22	37	621	620	621	.3535534
Morogoro	5	22	39	148.5	148	152	1.414214
Mjini Magharibi	1	23	41	662	662	675	4.30439
Dar es Salaam	2	23	42	210	209	223	3.324154
Iringa	6	23	44	368	368	369	.3535534
Kaskazini Pemba	2	31	49	707	707	711	1.333333
Dar es Salaam	3	31	50	227	227	246	4.592193
Singida	3	31	52	35	32	35	1.224745
Kigoma	3	31	53	481	481	482	.5175492
Tanga	6	31	57	108	108	110	.9718253
Ruvuma	1	33	58	323	323	325	.6456057
Mtwara	5	33	60	317	281	317	16.66476
Kilimanjaro	2	41	64	75	73	75	.6666667
Kilimanjaro	5	41	69	85	84	86	.5
Shinyanga	6	41	70	520	520	523	1
Singida	4	42	73	40	40	43	1.38873
Mwanza	8	43	75	586	586	624	16.6256
Kaskazini Pemba	2	51	79	709	708	709	.3333333
Kagera	3	51	80	553	553	554	.5
Dar es Salaam	1	52	82	185	182	208	10.93161
Kilimanjaro	1	61	85	68.5	68	69	.5345225
Rukwa	3	61	90	427	427	428	.5175492
Ruvuma	5	61	92	327	326	327	.3535534
Dar es Salaam	1	62	97	186	185	208	9.342257
Mjini Magharibi	2	62	99	684	679	698	4.886489
Kaskazini Unguja	1	71	101	642	642	643	.4082483
Mjini Magharibi	1	71	102	666	666	669	1.552647
Morogoro	4	71	106	144	144	145	.3535534
Dar es Salaam	1	72	114	187	183	187	1.157869
Dar es Salaam	3	72	115	229	170	229	26.3856
Mwanza	3	72	116	586	209	620	228.1104
Tabora	2	81	122	442	442	444	.6666667
Mwanza	2	81	123	582	582	594	3.618136
Rukwa	4	81	124	432	430	432	.6030227
Lindi	5	81	125	275	273	276	.6685579
Dar es Salaam	1	82	126	188	182	242	15.67285
Morogoro	3	83	131	138	137	138	.4409586
Manyara	4	83	134	125	47	125	15.92168
Shinyanga	8	83	136	531.5	531	532	.5345225
Ruvuma	2	91	138	329	329	331	1
Manyara	2	91	140	118	117	118	.3535534
Kaskazini Pemba	2	91	142	712	704	712	2.666667
Arusha	3	91	144	56	51	56	1.767767
Manyara	3	91	146	119	119	120	.5270463
Morogoro	6	91	150	157	155	157	.9759001
Dar es Salaam	1	92	151	189	189	231	10.07168
Dar es Salaam	2	92	152	216	210	225	3.408545
Morogoro	5	93	154	151	151	159	3.829708
Iringa	6	93	155	373	368	373	1.619328
Pwani	4	101	161	171	171	173	.7559289
Shinyanga	8	101	165	534	533	534	.3333333
Dar es Salaam	1	102	166	190	190	221	9.346851
Mjini Magharibi	2	102	169	688	668	688	7.440238
Dar es Salaam	3	102	170	232	232	243	3.050851
Arusha	3	103	172	57	57	58	.3535534
Kilimanjaro	5	103	174	87	46	87	21.6089
Kaskazini Unguja	2	111	178	647	647	649	.6666667
Kagera	6	111	183	574	573	574	.48795
Dar es Salaam	1	112	185	191	191	231	10.37224
Tabora	6	112	188	464	462	464	.7071068
Mbeya	8	113	195	411	410	411	.3162278
Kilimanjaro	1	121	196	70	70	71	.3535534
Mbeya	1	121	199	381	381	382	.3333333
Iringa	2	121	201	355	355	356	.3535534
Singida	4	121	203	42	41	42	.3535534
Mjini Magharibi	2	122	204	690	679	690	3.889087
Mtwara	5	123	211	321	288	321	8.467304
Lindi	6	123	212	280	254	280	6.257349
Shinyanga	7	123	213	528	503	528	7.513624
Mjini Magharibi	1	131	217	654	654	669	5.561774
Lindi	2	131	218	259	258	260	.6009252
Kaskazini Pemba	2	131	219	715	708	715	2.20101
Kagera	3	131	221	556	556	557	.3333333
Kusini Pemba	1	132	222	723	723	724	.3333333
Rukwa	2	133	227	422	405	422	5.375872
Dodoma	3	133	229	10	8	10	.8555853
Kagera	2	141	236	547	547	571	8.413424
Kilimanjaro	4	141	238	82	81	90	3.796145
Kagera	5	141	239	568	567	568	.4082483
Arusha	3	142	240	59	59	62	1.38873
Dar es Salaam	3	142	241	236	232	239	1.885092
Manyara	5	143	245	11	11	126	56.11425
Tabora	1	151	247	436	436	437	.5345225
Mbeya	3	151	249	392	389	392	1.035725
Lindi	4	151	250	265	265	272	3.24037
Shinyanga	6	151	252	522	495	522	8.140806
Dar es Salaam	1	152	253	196	186	196	5
Kusini Pemba	1	152	254	725	720	725	1.75119
Mjini Magharibi	2	152	255	691	667	691	8.262732
Kusini Pemba	1	161	258	726	721	726	1.767767
Singida	2	161	260	33	33	38	2.5
Kaskazini Unguja	2	161	261	649	648	650	1.069045
Kaskazini Pemba	1	162	263	705	705	710	1.658312
Dar es Salaam	1	163	264	197	196	197	.25
Mjini Magharibi	1	163	265	670	661	674	2.103075
Mbeya	8	163	267	414	413	414	.3535534
Kusini Pemba	1	171	269	727	718	727	3
Dodoma	2	171	270	5	5	6	.48795
Singida	3	173	277	37	37	460	141
Kagera	1	181	279	541	541	542	.5
Dar es Salaam	3	182	284	240	212	240	6.669852
Dar es Salaam	1	183	285	199	199	207	2.656845
Dar es Salaam	1	192	294	200	130	205	18.82306
Mjini Magharibi	2	192	295	693	670	693	8.083591
Dar es Salaam	3	192	296	241	220	241	7
Ruvuma	1	193	297	325	323	325	1.069045
Mwanza	4	201	301	596	596	618	9.647638
Dar es Salaam	3	202	305	242	213	242	7.578164
Mtwara	3	203	308	305	300	305	1.666667
Dar es Salaam	1	212	312	202	202	221	7.181325
Mwanza	6	213	314	611	611	612	.4082483
Dar es Salaam	1	222	317	203	182	215	7.542527
Mjini Magharibi	1	222	318	674	661	699	7.712297
Dar es Salaam	3	222	319	244	231	244	3.084664
Dar es Salaam	1	232	323	204	196	206	2.828427
Singida	1	241	325	30	30	31	.5175492
Tanga	4	241	329	104	103	104	.3535534
Dar es Salaam	1	242	330	205	205	208	1.410487
Mjini Magharibi	1	243	331	674	661	676	4.772607
Kagera	1	271	339	543	543	544	.4409586
Mwanza	6	281	343	559	559	613	27.88548
Mjini Magharibi	1	283	344	680	677	680	1.06066
Dodoma	5	291	346	18	17	26	3.796145
Rukwa	1	302	348	421	420	421	.438529
Dodoma	4	311	352	15	15	25	4.6291
Kilimanjaro	4	311	353	80	80	83	1.581139
Mjini Magharibi	2	312	354	696	687	697	4.257347
Mjini Magharibi	2	332	355	697	662	697	10.77877
*/

use `commlink_hh', clear
foreach id_c in `checklist' {
    display "`id_c'"
	tabulate admin3_c if id_03_c==`id_c', sort matrow(mat)
	replace admin3_c=mat[1,1] if id_03_c==`id_c'
	foreach l in 3 2 1 0 {
	    qui: su `admin`l'' if admin3_c==mat[1,1], detail
	    replace `admin`l'' = r(p50) if id_03_c==`id_c'
		}
	}
	
collapse (median) admin3_c `admin3' `admin2' `admin1' `admin0', by(id_03_c id_03 id_02 id_01) 

tempfile comm_to_adm3_link
save `comm_to_adm3_link', replace
save "${TEMP}/Community_merge_r2.dta", replace


** Household sample file

use `sample', clear
merge m:1 `admin0' `admin1' `admin2' `admin3' `ea' using `sample_ea', nogen keep(1 3) keepusing(ea_c)
merge m:1 `admin0' `admin1' `admin2' `admin3' using `sample_admin3', nogen keep(1 3) keepusing(admin3_c)
merge m:1 `admin0' `admin1' `admin2' using `sample_admin2', nogen keep(1 3) keepusing(admin2_c)
merge m:1 `admin0' `admin1' using `sample_admin1', nogen keep(1 3) keepusing(admin1_c)
merge m:1 `admin0' using `sample_admin0', nogen keep(1 3) keepusing(admin0_c)

save "${TEMP}/Sample_links_R2.dta", replace 



****************************
**** Expenditures FOOD  ****
****************************

** First generate month_year from the hh interview date

use  "${IN}\HH_SEC_A.dta", clear

rename hh_a18_year year_int
rename hh_a18_month month_int

keep `hh' year_int month_int

sort year_int month_int
*egen month_year = group(year_int month_int)

tempfile intdate
save `intdate', replace


use "${IN}\HH_SEC_K1.dta", clear
label list hh_k01_2
tab hh_k01_2,m
recode hh_k01_2 2=0 

label list hh_k02_1
/*1 KILOGRAMS 2 GRAMS 3 LITRE 4 MILLILITRE 5 PIECES*/

rename 	(hh_k01_2 	hh_k02_1 	hh_k02_2  	hh_k03_1 		hh_k03_2  		hh_k04 	hh_k05_1  	hh_k05_2 	hh_k06_1  			hh_k06_2) ///
		(eat 		unit_total 	q_total 	unit_purchase 	q_purchase 		spend 	unit_prod 	q_prod 		unit_gift 			q_gift)
 
duplicates report `hh' itemcode 
foreach i in total purchase prod gift {
   recode unit_`i' 0=.
   }
   
*data consistency check
gen ck0=(q_total==0 | q_total==.) if eat==1  //1 obs found (purchased and showed expenditure but not q_purchase or q_total) hh is 0903001016003601 and item is 601

tab ck0
g unitv = spend/q_purchase 
su unitv if unit_purchase==2 & itemcode==601, detail 
loc pr = r(p50)
replace q_purchase= spend/`pr' if `hh'=="0903001016003601" & item==601 // this is the obs that violates ck0. 
replace unit_purchase = unit_total if `hh'=="0903001016003601" & item==601
replace q_total=q_purchase if `hh'=="0903001016003601" & item==601
drop unitv

replace eat=0 if ck0==1 & q_total==0 //no obs
foreach i in total purchase prod gift {  
   replace unit_`i'=. if ck0==1 & q_total==0  //no changes made
   }
   
replace q_total= q_gift if (q_gift!=.) & (q_gift > 0) & q_total == .  //no obs 
gen ck_=(q_total!=.) if eat==0
tab ck_  //no obs 

foreach i in purchase prod gift { 
	gen ck_0`i'=(unit_`i'==. & q_`i' !=.) //unit is missing but q is not missing
	replace ck_0`i'=0 if unit_`i'==. & q_`i' ==0 //unit is missing but q is not 0
	gen ck_1`i'=(unit_`i' !=. & q_`i'==.) //q is missing but unit is not missing
	  
	tab ck_0`i',m 
	tab ck_1`i',m
	}

*fix the errors
replace unit_purchase=unit_total if ck_0purchase==1 //0 change
replace unit_prod=unit_total if ck_0prod==1 //1 change
replace unit_gift=unit_total if ck_0gift==1 //0 change
replace unit_prod=. if ck_1prod==1 //0 change

gen ck_2=0
replace ck_2=1 if spend==0 & q_purchase>0 
tab ck_2 //only one obs
replace unit_gift=unit_total if ck_2==1 
replace q_gift=q_total if ck_2==1 & q_prod==0
	replace q_gift=q_total - q_prod if ck_2==1 & q_prod>0
replace q_purchase=. if ck_2==1
replace unit_purchase=. if ck_2==1
replace spend=. if ck_2==1

*replace missing quantity with 0
foreach i in total purchase prod gift {
	recode q_`i' (.=0)
	}

drop ck0 ck_*

*convert units (grams->kilograms/ millilitre->litre)
tab unit_total

foreach i in total purchase prod gift { 
	replace q_`i'=q_`i'/1000 if unit_`i'==2 | unit_`i'==4 
	replace unit_`i'=1 if unit_`i'==2
	replace unit_`i'=3 if unit_`i'==4
	}
	
tempfile sec_k1_mid
save `sec_k1_mid', replace


*check if units for all sources are equal within an item 
rename (unit_total q_total unit_purchase q_purchase unit_prod q_prod unit_gift q_gift) ///
(unit_1 q_1 unit_2 q_2 unit_3 q_3 unit_4 q_4)
reshape long unit_ q_,i(`hh' itemcode) j(source)
drop if q_==0
by `hh' itemcode, sort: egen sd_= sd(unit_)
g diff= sd_ != . & sd_ > 0 /*these have only one type of cons per hh-item*/
drop sd_
tab diff //5 cases of differing units within a hh-item. These don't seem to be errors.)


** Purchase unit values (expenditures / quantity)
use `sec_k1_mid', clear

gen price_purchase=spend/q_purchase
g ch_=1 if price_purchase==. & q_purchase!=. & q_purchase>0 /*only 2 - q_purchase is same as q_total but spend is just missing. Treat these as gifts instead */
replace q_gift=q_purchase if ch_==1 
replace unit_gift=unit_purchase if ch_==1 
replace q_purchase=. if ch_==1
replace unit_purchase=. if ch==1

sort `hh'
merge m:1 `hh' using `sample'
drop _merge 

tempfile sec_k1_mid
save `sec_k1_mid', replace

keep if q_total>0 & q_total != . 
tempfile food_consumed_consumption
save `food_consumed_consumption', replace


** Generate median prices per kg from price_purchase (only for consumers)

keep if q_purchase > 0 & q_purchase != . 
tempfile food_purchased_consumption
save `food_purchased_consumption', replace

*by urb-region-district-ward-ea all units
preserve
collapse (median) p_purchase_adm4_=price_purchase, by(itemcode unit_purchase urban `admin0' `admin1' `admin2' `admin3' ea)
keep if p_purchase_adm4_ != . //drop missing p drawn from unit_purchase=. 
sort itemcode unit_purchase urban `admin1' `admin2' `admin3' ea
reshape wide p_purchase_adm4_, i(itemcode urban `admin0' `admin1' `admin2' `admin3' ea) j(unit_purchase)
tempfile p_purchase_adm4 
save `p_purchase_adm4', replace
restore

*generate median unit price by urb-region-district-ward
preserve
collapse (median) p_purchase_adm3_=price_purchase, by(itemcode unit_purchase urban `admin0' `admin1' `admin2' `admin3')
keep if p_purchase_adm3_ != .
sort itemcode unit_purchase urban `admin0' `admin1' `admin2' `admin3'
reshape wide p_purchase_adm3_, i(itemcode urban `admin0' `admin1' `admin2' `admin3') j(unit_purchase)
tempfile p_purchase_adm3
save `p_purchase_adm3', replace
restore

*generate median unit price by urb-region-district
preserve
collapse (median) p_purchase_adm2_=price_purchase, by(itemcode unit_purchase urban `admin0' `admin1' `admin2')
keep if p_purchase_adm2_ != .
sort itemcode unit_purchase urban `admin0' `admin1' `admin2'
reshape wide p_purchase_adm2_, i(itemcode urban `admin0' `admin1' `admin2') j(unit_purchase)
tempfile p_purchase_adm2
save `p_purchase_adm2', replace
restore

*generate median unit price by urb-region
preserve
collapse (median) p_purchase_adm1_=price_purchase, by(itemcode unit_purchase urban `admin0' `admin1')
keep if p_purchase_adm1_ != .
duplicates drop
sort itemcode unit_purchase urban `admin0' `admin1' 
reshape wide p_purchase_adm1_, i(itemcode urban `admin0' `admin1') j(unit_purchase)
tempfile p_purchase_adm1
save `p_purchase_adm1', replace
restore

*generate median unit price by macroregion
preserve
collapse (median) p_purchase_adm0_=price_purchase, by(itemcode unit_purchase urban `admin0')
keep if p_purchase_adm0_ != .
duplicates drop
sort itemcode unit_purchase urban `admin0' 
reshape wide p_purchase_adm0_, i(itemcode urban `admin0') j(unit_purchase)
tempfile p_purchase_adm0
save `p_purchase_adm0', replace
restore

*generate median unit price by urban
preserve
collapse (median) p_purchase_urbrur_=price_purchase, by(itemcode unit_purchase urban)
keep if p_purchase_urbrur_ != .
duplicates drop
sort itemcode unit_purchase urban
reshape wide p_purchase_urbrur_, i(itemcode urban) j(unit_purchase)
tempfile p_purchase_urbrur
save `p_purchase_urbrur', replace
restore

*generate median unit price by unit
preserve
collapse (median) p_purchase_unit_=price_purchase, by(itemcode unit_purchase)
keep if p_purchase_unit_ != .
duplicates drop
sort itemcode unit_purchase
reshape wide p_purchase_unit_, i(itemcode) j(unit_purchase)
tempfile p_purchase_unit
save `p_purchase_unit', replace
restore


** Merge prices for different levels from price_purchase with consumption info **

*Notes: In this section only keep housheold who consumed that item
*       Information about non-consuming households is not relevant for unit conversion. 

use `food_consumed_consumption', clear
tab q_total if unit_total == . //0 obs 
rename unit_total unit

keep `hh' itemcode unit ea `admin3' `admin2' `admin1' `admin0' urban  
tempfile price_sample
save `price_sample', replace

merge m:1 itemcode ea `admin3' `admin2' `admin1' `admin0' urban using `p_purchase_adm4'
drop _merge
merge m:1 itemcode `admin3' `admin2' `admin1' `admin0' urban using `p_purchase_adm3' 
drop _merge
merge m:1 itemcode `admin2' `admin1' `admin0' urban using `p_purchase_adm2' 
drop _merge
merge m:1 itemcode `admin1' `admin0' urban using `p_purchase_adm1'
drop _merge
merge m:1 itemcode `admin0' urban using `p_purchase_adm0'
drop _merge
merge m:1 itemcode urban using `p_purchase_urbrur'
drop _merge
merge m:1 itemcode using `p_purchase_unit'
drop _merge

foreach lev in adm4 adm3 adm2 adm1 adm0 urbrur unit {
	foreach u in 1 3 5 {
		winsor2 p_purchase_`lev'_`u' if unit==`u', suffix(imp) by(itemcode) cuts(1 99)
		g dif = p_purchase_`lev'_`u'!= p_purchase_`lev'_`u'imp & p_purchase_`lev'_`u'!=. & unit==`u'
		display "level: `lev'; unit: `u'; p_purchase_`lev'_`u'"
		bysort itemcode: su p_purchase_`lev'_`u' p_purchase_`lev'_`u'imp if dif==1
		replace p_purchase_`lev'_`u' = p_purchase_`lev'_`u'imp if dif==1
		drop dif
		}
	}

drop p_purchase*imp
	
tempfile prices
save `prices', replace


** Now fill in missing prices

use `sec_k1_mid', clear
rename unit_total unit
merge m:m `hh' itemcode unit using `prices'
tab q_total if _merge==1 
tab q_total if _merge==2
drop if _merge==1  // 177,051 observation with no total consumption; these do not matter for the unit conversion. 
drop _merge

* generate median level price 
foreach u in 1 3 5 {
	gen price_`u'_med = p_purchase_adm4_`u' 
	}

foreach u in 1 3 5 {
	* keep median price only if there are 3 or more households that consumed that item with the same unit in the same ea
	bys itemcode unit ea `admin3' `admin2' `admin1' `admin0' urban: egen n_`u'_adm4=count(price_purchase!=.) if unit==`u'
	tab n_`u'_adm4 
	replace price_`u'_med =. if n_`u'_adm4<3
	
	* if ea level is missing, replace with ward level prices
	bys itemcode unit `admin3' `admin2' `admin1' `admin0' urban: egen n_`u'_adm3=count(price_purchase!=.) if unit==`u'
	tab n_`u'_adm3 
	replace price_`u'_med = p_purchase_adm3_`u' if price_`u'_med == . & n_`u'_adm3 >= 3 

	* if ea and ward level are missing, replace with district level prices
	bys itemcode unit `admin2' `admin1' `admin0' urban: egen n_`u'_adm2=count(price_purchase!=.) if unit==`u'
	tab n_`u'_adm2 
	replace price_`u'_med = p_purchase_adm2_`u' if price_`u'_med == . & n_`u'_adm2 >= 3 

	* if ea, ward and district level are missing, replace with region level prices
	bys itemcode unit `admin1' `admin0' urban: egen n_`u'_adm1=count(price_purchase!=.) if unit==`u'
	tab n_`u'_adm1 
	replace price_`u'_med = p_purchase_adm1_`u' if price_`u'_med == . & n_`u'_adm1 >= 3 

	* if ea, ward, district and region level are missing, replace with macro-region level prices
	bys itemcode unit `admin0' urban: egen n_`u'_adm0=count(price_purchase!=.) if unit==`u'
	tab n_`u'_adm0 
	replace price_`u'_med = p_purchase_adm0_`u' if price_`u'_med == . & n_`u'_adm0 >= 3 

	* if ea, ward, district, region and macro-region level are missing, replace with urban/rural level prices
	bys itemcode unit urban: egen n_`u'_urbrur=count(price_purchase!=.) if unit==`u'
	tab n_`u'_urbrur 
	replace price_`u'_med = p_purchase_urbrur_`u' if price_`u'_med == . & n_`u'_urbrur >= 3 

	* if all else missing, replace with unit prices
	bys itemcode unit: egen n_`u'_unit=count(price_purchase!=.) if unit==`u'
	tab n_`u'_unit 
	replace price_`u'_med = p_purchase_unit_`u' if price_`u'_med == . & n_`u'_unit >= 3 
	}

foreach u in 1 3 5 {
	count if price_`u'_med == . & unit==`u'
	** 1 for kg; 3 for liter; 5 for pieces;
	drop if price_`u'_med == . & unit==`u'
	
	winsor2 price_`u'_med if unit==`u', suffix(imp) by(itemcode) cuts(5 95)
	replace price_`u'_med=price_`u'_medimp
	}

drop p_purchase_adm4_1 - p_purchase_unit_5 *medimp
drop n_1_adm4 - n_5_unit

tempfile consumed_consumption
save `consumed_consumption', replace


** Now save price dataset

collapse (median) price_1_med price_3_med price_5_med, by(`admin0' `admin1' `admin2' `admin3' `ea' itemcode)

tempfile prices_purchased
save `prices_purchased', replace


** Convert to most common unit

use `consumed_consumption', clear

count if unit!=1 & unit!=3 //  739 obs with non-standard unit  
* Identify the most common unit for each item. In the final dataset we will convert to kg, for now liters and pieces are still OK
gen kg=1 if unit==1
gen lit=1 if unit==3
gen pie=1 if unit==5
collapse (sum) nkg=kg nlit=lit npie=pie, by(itemcode) // count the number of units of each item

*compare the number of units within item and find the the most common one:
gen common_unit=.
replace common_unit=1 if nkg>=nlit & nkg>=npie
replace common_unit=3 if nlit>nkg & nlit>npie
replace common_unit=5 if npie>nkg & npie>nlit

tab common_unit
count if common_unit==.

label var common_unit "most common unit per item: 1=kg, 3=liter, 5=pieces"
sort common_unit
by common_unit: list itemcode nkg nlit npie // check whether we find the most common unit correctly 

drop nkg nlit npie 

tempfile commonunit_w1
save `commonunit_w1', replace

* Now convert to most common unit
use  `consumed_consumption', clear
merge m:1 itemcode using `commonunit_w1'
drop _m

* Convert unit of total consumption to the most common unit for each item  *
gen nonst = (unit!=1 & unit!=3) // 739 obs
count if nonst==1 & common_unit==1 & price_1_med==. /*103*/
count if nonst==1 & common_unit==3 & price_3_med==. /*0*/
count if nonst==1 & common_unit==5 & price_5_med==. /*0*/

* Use the median price to get an approximate value of total consumption. When consumption reported in non-standard units, we convert using the median price at the EA or higher level. We use this price to get the value of total consumption if there is a non-standard unit of consumption.
g spend_temp = q_total * price_1_med if unit==1
replace spend_temp = q_total * price_3_med if unit==3
replace spend_temp = q_total * price_5_med if unit==5


* For items whose most common unit is kg, covert all units to kg [and for liters, pieces as well]
levelsof itemcode, local(items)
foreach item in `items' {
	foreach u in 1 3 5 {
		replace q_total = spend_temp / price_`u'_med if itemcode==`item' & unit!=`u' & common_unit==`u'
		replace q_purchase = spend_temp / price_`u'_med if itemcode==`item' & unit!=`u' & common_unit==`u'
		replace unit=`u' if itemcode==`item' & common_unit==`u'
		}
	}

tempfile consumed_consumption_mid2_t
save `consumed_consumption_mid2_t', replace


** exclude outliers of quantity per capita consumed (pq_total) and of quantity purchased (pq_purchase) and of price, using interquartile 

merge m:1 `hh' using "${IN}\TZY2.HH.Consumption.dta", nogen keepusing(adulteq hhsize)

g price_med = price_1_med if common_unit==1
replace price_med = price_3_med if common_unit==3
replace price_med = price_5_med if common_unit==5

foreach j in total purchase { 
	g qpc_`j' = q_`j' / adulteq
	}

levelsof itemcode, local(items)
foreach item in `items' {
	tab itemcode if itemcode==`item'
	foreach j in total purchase {	
		su qpc_`j' if itemcode==`item' & qpc_`j'>0 & qpc_`j'!=., detail /*excludes zero from the lower bound*/
		replace qpc_`j' = r(p95) if itemcode==`item' & qpc_`j'>0 & qpc_`j'!=. & qpc_`j'>r(p95)
		replace qpc_`j' = r(p5) if itemcode==`item' & qpc_`j'>0 & qpc_`j'!=. & qpc_`j'<r(p5)
		}
	su price_purchase if itemcode==`item' & q_purchase>0 & q_purchase!=., detail 
	replace price_purchase = r(p95) if itemcode==`item' & q_purchase>0 & q_purchase!=. & price_purchase>r(p95)
	replace price_purchase = r(p5) if itemcode==`item' & q_purchase>0 & q_purchase!=. & price_purchase<r(p5)
	}

replace spend=q_total*price_purchase if price_purchase != .
replace spend=q_total*price_med if price_purchase==. & spend==. & q_total > 0 & q_total!=.

duplicates tag `hh' itemcode, gen(dup)
tab dup // no duplicates 

drop price_1_med price_3_med price_5_med

tempfile consumed_consumption_mid2 
save `consumed_consumption_mid2', replace 


**  Now restore obverations for non-consumers   

use `sample', clear
merge 1:m `hh' using `consumed_consumption_mid2'

tab _merge
drop if _merge!=3 //none dropped 
drop _merge 

count if unit==common_unit /*this is true for all obs*/
count


*restore obs for households who did not consume (assume missing means non-consumption)
duplicates drop // 0 obs

rename spend foodcons

keep `hh' `admin0' `admin1' `admin2' `admin3' `ea' itemcode  eat q_total q_purchase foodcons unit foodcons price_purchase price_med
destring unit, replace 

*We reshape the data to wide so that we can input q_total=0, foodcons=0 for each item

drop if itemcode==. | unit==.

levelsof itemcode, local(items)

reshape wide eat q_total q_purchase unit foodcons price_purchase price_med , i(`hh') j(itemcode)

foreach i in `items' {
	replace eat`i'=0              if eat`i'==.
	replace q_total`i'=0          if q_total`i'==.
	replace foodcons`i'=0         if foodcons`i'==.
	replace q_purchase`i'=0       if q_purchase`i'==.
	}
	
*Reshape data back to long
reshape long eat q_total q_purchase unit foodcons price_purchase price_med, i(`hh') j(itemcode)
drop if q_total==.

codebook itemcode // 42 items 
tab itemcode

g foodgroup = .
label define foodgroups 1 staples 2 pulsenutfruitveg 3 animal 4 other
label values foodgroup foodgroups
replace foodgroup = 1 if inlist(itemcode,101,102,103,104,105,106,107,108,109,110,111,112,201,202,203,204,205,206,207)
replace foodgroup = 2 if inlist(itemcode,401,501,503,504,502,601,602,603,701,702,703)
replace foodgroup = 3 if inlist(itemcode,801,802,803,804,805,806,807,808,809,810,901,902,903)
replace foodgroup = 4 if inlist(itemcode,301,302,303,704,1001,1002,1003,1004,1101,1102,1103,1104,1105,1106,1107,1108)

tempfile foodcons_item_wk
save `foodcons_item_wk', replace


** merge with prices

merge m:1 `admin0' `admin1' `admin2' `admin3' `ea' itemcode using `prices_purchased', keepusing(price_1_med price_3_med price_5_med)
tab _m
drop _m

foreach u in 1 3 5 {
	replace price_med = price_`u'_med if price_med==. & price_`u'_med!=. & unit==`u'
	}

replace foodcons = price_med * q_total if q_total > 0 & price_med==.
drop if inlist(itemcode,503,504,805,806,810,903,1103,1105,1108) /*These items are too rarely consumed*/


** Generate price unit values now

duplicates report `hh' itemcode // no duplicates  

keep `hh' itemcode foodgroup eat unit q_total q_purchase foodcons price_purchase price_med 

tempfile foodcons_item_wk
save `foodcons_item_wk', replace


save "${TEMP}/Foodcons_item_wk_W2.dta", replace

collapse (sum) foodcons, by(foodgroup `hh')

reshape wide foodcons, i(`hh') j(foodgroup)

su foodcons*, detail

foreach var in foodcons1 foodcons2 foodcons3 foodcons4 {
	replace `var' = `var'*52 /*annualized*/
	winsor2 `var', suffix(imp) cuts(0 99) 
	replace `var' = `var'imp 
	}

egen foodcons = rowtotal(foodcons1 foodcons2 foodcons3 foodcons4)

tempfile foodcons_hh
save `foodcons_hh', replace


*****************************
**** Expenditures GOODS  ****
*****************************

** Goods consumption -- includes food (section K), non-food one-week recall items (section L), non-food annual items (section M)

* Non-food goods consumption - from sections L and M (some are goods some are services)
* Section L is non-food one week 
	* --> Goods include cigarettes, matches, kerosene, gas, petrol, soap, toiletries, personal products, cleaning products, bulbs, 
* Section M is non-food 12 months 
	* --> Goods include carpets, linens, mats, mosquito nets, mattresses, sports equipment, film

import excel using "${LOCDRIVE}\Parameters\GoodServiceCodes.xlsx", clear firstrow sheet("Tanzania_R2")

tempfile goodservicecodes
save `goodservicecodes', replace

use "${IN}\HH_SEC_L.dta", clear
rename hh_l01_2 purchase
rename hh_l02 exp

*drop if purchase==9 | purchase==. /*supposedly missing y/n on whether purchased - but we have exp data*/
replace purchase = 1 if purchase==2 & exp >0 & exp != .
replace purchase = 1 if purchase==. & exp >0 & exp != . & exp != 99 /*no cases of missing purchase and nonmissing exp*/ 

drop if purchase==2 

keep `hh' itemcode purchase exp
sort `hh'

tempfile 	 sectionL
save 		`sectionL', replace

save "${TEMP}\SectionL_raw_R2", replace


use "${IN}\HH_SEC_M.dta", clear
rename hh_m01_2 purchase
rename hh_m02 exp /*value not expenditures on purchased*/
replace exp = hh_m03 if (itemcode == 318 | itemcode == 319) & (exp==0 | exp==.) & (hh_m03 != . | hh_m03 != 0) /*bamboo and grass - exp reports value consumed and smq3 reports value purchased. because both refer to the full 12 months I think we should use value consumed primarily.*/ 

drop if (purchase == 2) & (exp == . | exp == 0)

keep `hh' itemcode purchase exp
sort `hh'

append using `sectionL'

sort itemcode
merge m:1 itemcode using `goodservicecodes', nogen keep(1 3) keepusing(recall good service other good_* service_*)

winsor2 exp, suffix(imp) cuts(0 99) by(itemcode) /*winsorize top 1 percent*/

foreach type in service good other {
	g exp_`type' = expimp * `type' * (12/recall) /*recall is the number of months per recall period*/
	}

loc goodservicecollapselist ""
foreach type in `gooddisagglist' `servicedisagglist' {
	g exp_`type' = expimp * `type' * (12/recall) /*recall is the number of months per recall period*/
	loc goodservicecollapselist `goodservicecollapselist' exp_`type'
	}
	
	
**Export item level data for descriptive analysis later
sort `hh' itemcode
save "${TEMP}\Tanzania_GoodService_Items_R2.dta", replace
	
collapse (sum) exp_good exp_service exp_other `goodservicecollapselist', by(`hh')

sort `hh'
merge 1:1 `hh' using `sample', nogen keep(1 3)

winsor2 exp_good exp_service exp_other `goodservicecollapselist', suffix(imp) cuts(0 99) by(`admin1')  /*winsorize top 0.5 percent by region*/

foreach g in good service other `gooddisagglist' `servicedisagglist' {
	rename exp_`g'imp exp_`g'_secLM
	drop exp_`g'
	}

sort `hh'
tempfile goodsandservices
save `goodsandservices', replace


********************************
**** Expenditures SERVICES  ****
********************************

** Services consumption
* Section F is food outside hh - 7 day value of expenditures by member on prepared food and bar and local brews --> YES
* Section J is housing --> rent? housing materials? use of water vender? --> YES
* Section C is education
* Section D is health, Children elsewhere section doesn't exist, section O is assistance and groups, section P is credit, section Q is crime/justice, section R is shocks, section S is deaths

* Food outside of house consumption (Section F)

use "${IN}\HH_SEC_F.dta", clear
keep `hh' `indiv' hh_f*

sort `hh'
merge m:1 `hh' using `sample', nogen keep(1 3) keepusing(`admin1' `admin0')

drop if hh_f01==2 | hh_f01==. /*no data are lost here*/

*3: 3 exp obs w 2 not coded to 1; 5: none; 7: none; 9: none; 11: one as no; 13: one as no; 15: one as no
foreach k in 3 5 7 9 11 13 15 {
	loc j = `k' - 1
	if `k'<10 {
	    loc kind 0`k'
		loc jind 0`j'
		}
		else if `k'>10 {
		    loc kind `k'
			loc jind `j'
			}
	bysort hh_f`jind': su hh_f`kind', detail
	replace hh_f`kind' = 0 if (hh_f`kind' == 99 | hh_f`kind' == 9) & hh_f`jind' != 1  /*no changes made*/
	}

egen exp_service_food = rowtotal(hh_f03 hh_f05 hh_f07 hh_f09 hh_f11 hh_f13 hh_f15), missing /*3 missing vals are zeros across the board*/

winsor2 exp_service_food, suffix(imp) cuts(0 99) by(`admin1' `admin0')
replace exp_service_food = exp_service_foodimp * 52 /*recall period is one week*/

collapse (sum) exp_service_food, by(`hh' `admin1' `admin0')

winsor2 exp_service_food, suffix(imp) cuts(0 99) by(`admin1'  `admin0')
replace exp_service_food = exp_service_foodimp

sort `hh' 
tempfile 	 services_food
save 		`services_food', replace


* Rent and housing (Section J)

use "${IN}\HH_SEC_J1.dta", clear

keep `hh' hh_j*
sort `hh'
merge 1:1 `hh' using `sample', nogen keep(1 3) keepusing(`admin1' `admin0')

rename hh_j02 exp_rent
recode exp_rent (.=0)
replace exp_rent = exp_rent * 12 /*annualized*/

winsor2 exp_rent, suffix(imp) cuts(0 99) by(`admin1')
replace exp_rent = exp_rentimp

rename exp_rent exp_service_rent

keep `hh' exp_service_rent
sort `hh'

tempfile	 services_rent
save		`services_rent', replace


* Education (Section C)

use "${IN}\HH_SEC_C.dta", clear

keep `hh' `indiv' hh_c28_* /*these are broken out but the tot variable is the sum of the components in all but 59 cases (6070 they are the same). Could be a difference between cash and in kind (total includes in kind)*/
sort `hh'
merge m:1 `hh' using `sample', nogen keep(1 3) keepusing(`admin1')

egen exp_good_educ = rowtotal(hh_c28_2 hh_c28_3), missing /*books and uniform*/
egen exp_service_educ = rowtotal(hh_c28_1 hh_c28_4 hh_c28_5 hh_c28_6 hh_c28_7), missing /*fees, transportation, tuition, other, food */

winsor2 exp_service_educ exp_good_educ, suffix(imp) cuts(0 99) by(`admin1')
replace exp_service_educ = exp_service_educimp
replace exp_good_educ = exp_good_educimp

recode exp_service_educ exp_good_educ (.=0)
collapse (sum) exp_service_educ exp_good_educ, by(`hh')

keep `hh' exp_service_educ exp_good_educ
sort `hh'

tempfile exp_educ
save `exp_educ', replace


* Health (Section D)
use "${IN}\HH_SEC_D.dta", clear

keep `hh' `indiv' hh_d* /*paper questionnaire questions are different than computer*/
sort `hh'
merge m:1 `hh' using `sample', nogen keep(1 3) keepusing(`admin1')

egen service_health_4w = rowtotal(hh_d07 hh_d09), missing /*monthly*/
egen service_health_hos = rowtotal(hh_d13 hh_d15), missing /*annual hospital and healer expenses*/
g exp_good_health = hh_d08 /*monthly*/

winsor2 service_health_4w service_health_hos exp_good_health, suffix(imp) cuts(0 99) by(`admin1')
foreach var in service_health_4w service_health_hos exp_good_health {
	replace `var' = `var'imp
	recode `var' (.=0)
	}

replace exp_good_health = exp_good_health * (52/4)
replace service_health_4w = service_health_4w * (52/4)

egen exp_service_health = rowtotal(service_health_4w service_health_hos)

collapse (sum) exp_service_health exp_good_health, by(`hh')

keep `hh' exp_service_health exp_good_health
sort `hh'

tempfile exp_health
save `exp_health', replace


*********************
**** Prices FOOD ****
*********************

** Use mean in base period and district as base #fixfixfix should I use the sample-wide mean instead?
*https://www.stata.com/statalist/archive/2003-01/msg00076.html
/*items dropped from HDS: paddy rice 101  green cob maize 103 cashew almonds other nuts 503 canned dried and wild vegetables 603 package fish 810 prepared tea coffee 1105 */

** Now generate the Fisher price indexes

use `foodcons_item_wk', clear
g unitv = foodcons/q_total
merge m:1 `hh' using `sample', nogen keepusing(urban `admin0' `admin1' `admin2' `admin3' `weight')

tempfile foodcons_item_sample
save `foodcons_item_sample', replace


*merge m:1 itemcode using `base_qty_price_adm0', nogen keepusing(x0_adm0 p0_adm0)
merge m:1 itemcode using "${TEMP}\Base_qty_price_adm0.dta", nogen keepusing(x0_adm0 p0_adm0)

g laspeyeres_num = price_med*x0_adm0
g laspeyeres_denom = p0_adm0*x0_adm0
g paasche_num = price_med*q_total
g paasche_denom = p0_adm0*q_total 

collapse (sum) laspeyeres_num laspeyeres_denom paasche_num paasche_denom eat, by(`hh')

g fisher = sqrt((laspeyeres_num/laspeyeres_denom)*(paasche_num/paasche_denom))

winsor2 fisher, suffix(imp) cuts(1 99)
replace fisher=fisherimp

rename fisher pr_foodtot

g lpr_food = log(pr_foodtot)


** Save dataset

keep `hh' lpr_food pr_foodtot 
sort `hh'  

tempfile 	 prices_foodtot
save 		`prices_foodtot', replace

use `sample', clear
merge 1:1 `hh' using `prices_foodtot',  nogen keep(1 3)

tempfile 	 prices_foodtot
save 		`prices_foodtot', replace


****************************
**** Total Expenditures ****
****************************

use "${IN}\TZY2.HH.Consumption.dta", clear
keep `hh' expm foodIN foodOUT utilities hhexpenses health transport communic recreat educa other adulteq hhsize
 
rename hhsize hhsize_adeq 

*egen exp_sagg_serv = rowtotal(foodOUT utilities hhexpenses health transport communic recreat educa)
*egen exp_sagg_good = rowtotal(foodIN)
*egen exp_sagg_other = rowtotal(other)

g pcexp = expm / hhsize /*annual nominal expenditures*/
g pcexp_adeq = expm / hhsize_adeq
rename expm hhexp

winsor2 hhexp pcexp pcexp_adeq, suffix(imp) cuts(0 99)
replace pcexp=pcexpimp
replace pcexp_adeq=pcexp_adeqimp
replace hhexp=hhexpimp 

keep `hh' hhexp pcexp pcexp_adeq hhsize hhsize_adeq /*exp_sagg**/
sort `hh'

tempfile	 expenditures
save		`expenditures', replace
*


***********************
**** Covariates ****
***********************/

** Education variables

use "${DATADRIVE}\OtherRaw\TimeuseAll_indiv_${YEAR}.dta" 

g educ_yrs_head = educ_yrs if relationtohead==1
g female_head = female if relationtohead==1
g age_head = age if relationtohead==1
g educ_yrs_spouse = educ_yrs if relationtohead==2 
g age_spouse = age if relationtohead==2

sort `hh' relationtohead
collapse (firstnm) female_head age_head educ_yrs_head (max) educ_yrs_max=educ_yrs educ_yrs_spouse age_spouse, by(`hh')
count if female_head==. 
count if age_head==.
count if educ_yrs_head==.
replace educ_yrs_head = educ_yrs_spouse if educ_yrs_head==.
replace age_head = age_spouse if age_head==.

sort `hh'
keep `hh' educ_yrs_head educ_yrs_max

tempfile educ
save `educ', replace


** Dependency Variables

use "${DATADRIVE}\OtherRaw\TimeuseAll_indiv_${YEAR}.dta", clear
keep `hh' `indiv' age tobedropped

sort `hh' `indiv'
merge 1:1 `hh' `indiv' using "${IN}\HH_SEC_B.dta", keepusing(hh_b04)
replace age = hh_b04 if age==.
preserve

g pop_dep = ( age < 15 | age > 65 ) & age != . & tobedropped==0
g pop_indep = ( age >= 15 & age <= 65 ) & age != . & tobedropped==0

collapse (sum) pop_dep pop_indep, by(`hh')
count if pop_dep==.
count if pop_dep==0 & pop_indep==0
count if pop_indep==.
count if pop_indep==0

g hh_depshare = (pop_dep + 0.1) / (pop_dep + pop_indep + 0.1) /*the 0.1 is to prevent dividing by zero if age missing from all*/
count if hh_depshare==.
	
keep `hh' hh_depshare
tempfile depshare
save `depshare', replace


** Participation Variables 

use "${DATADRIVE}\OtherRaw\TimeuseAll_hh_${YEAR}.dta", clear 

g pcmh_farm = p_ownfarm==1 & h_ownfarm > 35 /*"Min hours participation" participation - at least 35 hours per year*/
g pcmh_ent = p_ownent == 1 & h_ownent > 35
g pcmh_market = p_wagelbr == 1 & h_wagelbr > 35


keep `hh' pcmh_* 
tempfile pcmh
save `pcmh', replace


** Asset index

use "${IN}\HH_SEC_N.dta", clear
/* Asset list same as R1
         401 Radio and Radio Cassette - yes
         402 Telephone(landline) - yes
         403 Telephone(mobile) - yes
         404 Refrigerator or freezer - yes
         405 Sewing Machine - yes
         406 Television - yes
         407 Video / DVD - yes
         408 Chairs - yes
         409 Sofas - yes
         410 Tables - yes
         411 Watches - yes
         412 Beds - yes
         413 Cupboards, chest-of-drawers, boxes, wardrobes, bookcases - yes
         414 Lanterns - yes
         415 Computer - yes
         416 Cooking pots, Cups, other kitchen utensils - yes
         417 Mosquito net - yes
         418 Iron (Charcoal or electric) - yes
         419 Electric/gas stove - yes
         420 Other stove - yes
         421 Water-heater - yes
         422 Record/cassette player, tape recorder - yes
         423 Complete music system - yes
         424 Books (not school books) - yes
         425 Motor Vehicles - yes
         426 Motor cycle - yes
         427 Bicycle - yes
         428 Carts
         429 Animal Cart
         430 Boat/canoe
         431 Wheel barrow
         432 Livestock
         433 Poultry
         434 Outboard engine
         435 Donkeys
         436 Fields/Land
         437 House(s) - yes
         438 Air-conditioned - yes
         439 Dish antenna/decoder - yes
         440 Hoes
         441 Spraying machine
         442 Water pumping set
         443 Reapers
         444 Tractor
         445 Trailer for tractors etc.
         446 Plough etc.
         447 Harrow
         448 Milking machine
         449 Harvesting and threshing machine
         450 Hand milling machine
         451 Coffee pulping machine
         452 Fertilizer distributor
*/
*

loc assetlist 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 437 438 439 

g keep=0
foreach asset in `assetlist' {
	replace keep = 1 if itemcode==`asset'
	}

keep if keep==1
drop keep

rename hh_n01_2 count_
reshape wide count_, i(`hh') j(itemcode)

foreach asset in `assetlist' {
	replace count_`asset' = 0 if count_`asset'==.
	}
	
pca count_401 - count_439

predict wealthscore

keep `hh' wealthscore
la var wealthscore "Consumptive asset index"

tempfile wealthscore
save `wealthscore', replace

merge 1:1 `hh' using `wealthscore', nogen keep(1 3)
sort `hh'


** Build the covariates dataset

use `sample', clear
sort `hh'
merge 1:1 `hh' using `educ', nogen keep(1 3) keepusing(educ_yrs_head educ_yrs_max)
sort `hh'
merge 1:1 `hh' using `depshare', nogen keep(1 3) keepusing(hh_depshare)
sort `hh'
merge 1:1 `hh' using `pcmh', nogen keep(1 3) keepusing(pcmh_farm pcmh_ent pcmh_market)
sort `hh'
merge 1:1 `hh' using `wealthscore', nogen keep(1 3) keepusing(wealthscore)

tempfile covariates
save `covariates', replace


*********************************
**** Dataset Assembley ****
********************************/

* Put tegether full dataset

use `sample', clear
merge 1:1 `hh' using `foodcons_hh', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `goodsandservices', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `services_food', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `services_rent', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `exp_educ', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `exp_health', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `expenditures', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `covariates', nogen keep(1 3)
sort `hh'
merge 1:1 `hh' using `intdate', nogen keep(1 3)
sort `hh'
save "${TEMP}\Tanzania_HH_R2.dta", replace
*

**************
log close
